################################################################################
# Pooling the Polls - Netherlands
# Author: Tom Louwerse
# Based on code from Simon Jackman
# Date: 11 June 2012
# MAIN MODEL
# Last modified: 
# - June 2021: Adapted for 2021 parliament
# - September 2023: Adapted for introduction of NSC
# - November 2023: Adapted for allowing quicker random walk during campaign
################################################################################


################################################################################
# PRELIMINARIES
################################################################################

# Read libraries
library(rjags)        # Requires JAGS to be installed
library(colorspace)
library(dplyr)      
library(car)            
library(foreach)  
library(doParallel)
library(coda)


################################################################################
# DATA PREPARATION
################################################################################

load("sourcedata.RData")

################################################################################
# RUN MODEL
################################################################################

# Function for running model with multiple house effects
runmodel_mhe = function(dat, margin=rep(0.0005,nrow(dat)), outcome=0,n.adapt=1e5,n.iter=5e5,thin=1e2,electionDate=as.Date("2021-03-17"),bugfile="Modellen/current_model.bug", 
                        start=NA,end=NA, endall=NA, shockdates = NA) {
  
  library(rjags)    # CRAN v4-10    
  library(coda)     # CRAN v4-10
  
  # Recode houseperiod, so that minimum is always 1
  dat$houseperiod <- as.integer(dat$houseperiod) - 
    min(as.integer(dat$houseperiod)) + 1
  
  # Start transition
  startTransition = 2
  if(!is.na(start)) {
    startTransition <- as.numeric(julian(start, origin = electionDate))
  }
  
  campaignStart <- as.numeric(julian(as.Date("2023-11-08"), origin = electionDate))
  
  # End transition
  # For parties that stop existing during a term, transitions stop per the date of the last poll
  # but alpha will continue to be set at zero, for purposes of subsequent plotting etc
  if(!is.na(end)) {
    if(julian(end, origin=as.Date("2010-01-01")) > max(dat$datenum)) end = as.Date(max(dat$datenum), origin="2010-01-01")
    NPERIODS = length(julian(electionDate, origin=as.Date("2010-01-01")):julian(end, origin=as.Date("2010-01-01")))
    alpha=c(outcome, 
            rep(NA,length(julian(electionDate, origin=as.Date("2010-01-01")):
                            julian(end, origin=as.Date("2010-01-01"))-1)),
            rep(0,length((julian(end, origin=as.Date("2010-01-01"))+1):
                           julian(endall, origin=as.Date("2010-01-01"))-1)))
  } else {
    NPERIODS = length(julian(electionDate, origin=as.Date("2010-01-01")):max(dat$datenum))
    alpha=c(outcome, 
            rep(NA,length(julian(electionDate, 
                                 origin=as.Date("2010-01-01")):max(dat$datenum))-1))
  }
  
  
  
  foo <- list(y=dat$percentage,
              vari=dat$var,
              date=as.numeric(dat$datenum - 
                                julian(electionDate, 
                                       origin=as.Date("2010-01-01")) + 1),
              houseperiod=dat$houseperiod,
              org=as.integer(as.factor(as.character(dat$bedrijf))),
              margin=margin,
              NHOUSE=length(unique(dat$bedrijf)),
              NPOLLS=length(dat$percentage),
              NPERIODS=NPERIODS,
              NHOUSEPERIODS=length(unique(dat$houseperiod)),
              STARTTRANSITION=startTransition,
              CAMPAIGNSTART=campaignStart,
              alpha=alpha)
  
  
  # Define weights for pollsters
  house_names <- sort(unique(dat$bedrijf))
  house_weights <- matrix(1, foo$NHOUSE, foo$NHOUSEPERIODS)
  
  if("Kantar" %in% house_names & foo$NHOUSEPERIODS > 1) {
    kantar_which <- which(house_names == "Kantar")
    house_weights[kantar_which,2:foo$NHOUSEPERIODS] <- 0 # set weight for kantar post-June 2022 to 0
  }
  # if(dat$party[1] == "NSC") {
  #   ipsos_which <- which(house_names == "Ipsos/1V")
  #   house_weights[ipsos_which,1:foo$NHOUSEPERIODS] <- 0 # set weight for kantar post-June 2022 to 0
  # }
  # if(foo$NHOUSEPERIODS == 3) {
  #   period3_which <- which(house_names %in% c("EenVandaag", "Peil"))
  #   print(period3_which)
  #   house_weights[period3_which,3] <- 0 # set weight for other pollsters to 0 in period 3
  # }
  
  
  # if("LISS" %in% house_names) {
  #   liss_which <- which(house_names == "LISS")
  #   
  #   # set weights for periods 1-4 (bit complicated, because ofnew parties)
  #   if(foo$NHOUSEPERIODS == 5) house_weights[liss_which,1] <- 0 
  #   if(foo$NHOUSEPERIODS >= 4) house_weights[liss_which,foo$NHOUSEPERIODS-3] <- 0 
  #   if(foo$NHOUSEPERIODS >= 3) house_weights[liss_which,foo$NHOUSEPERIODS-2] <- 0 
  #   if(foo$NHOUSEPERIODS >= 2) house_weights[liss_which,foo$NHOUSEPERIODS-1] <- 0 
  #   #if(foo$NHOUSEPERIODS >= 1) house_weights[liss_which,foo$NHOUSEPERIODS] <- 0
  # }
  
  house_weights <- house_weights / matrix(colSums(house_weights), 
                                          foo$NHOUSE, 
                                          foo$NHOUSEPERIODS, 
                                          byrow=TRUE)
  foo$house_weights <- house_weights
  print(house_weights)
  
  # Parties that were not polled before a certain date, are set to 0 beforehand
  if(!is.na(start)) {
    # setToZero <- julian(electionDate, origin=as.Date("2010-01-01")):
    #   julian(start, origin=as.Date("2010-01-01")) -
    #   (julian(electionDate, origin=as.Date("2010-01-01"))) + 1
    
    setToZero <- 1:(julian(start, origin=electionDate))
    
    foo$alpha[setToZero]  <- 0
  }
  
  
  shockEvents <- function(x, start, end, shocklength=1, origin=as.Date("2010-01-01")) {
    x_num <- julian(x, origin=origin) 
    end_num <- julian(end, origin=origin) 
    start_num <- julian(start, origin=origin)
    
    x_num <- x_num[which(x_num <= end_num)]
    
    x_num <- x_num - start_num + 1
    end_num <- end_num - start_num + 1
    
    x_num <- sort(x_num)
    y <- rep(1, length(1:end_num))
    
    for(ii in 1:shocklength) {
      y[x_num + ii - 1] <- (1:length(x_num)) + 1  
    }
    return(y)
  }
  
  # shockdates = as.Date("2023-08-21")
  shocks <- shockEvents(shockdates, electionDate, max(dat$date), shocklength=1)
  
  foo$shocknumber <- shocks
  foo$NSHOCKS <- max(foo$shocknumber)
  
  
  inits <- list(y2=dat$percentage, .RNG.name = "base::Mersenne-Twister", .RNG.seed=20212021)
  
  model = rjags::jags.model(bugfile, foo, n.adapt=n.adapt, inits=inits)
  
  est = rjags::coda.samples(model, c("alpha", "sigma", "house", "shocksize", "sigmacampaign"), n.iter=n.iter, thin=thin)
  return(est)
}

# Full run ----------------------------------------------------------------
cat("Start analysis\n")
flush.console()



workerCount=min(7, as.integer(Sys.getenv("NUMBER_OF_PROCESSORS"))) 
w <- makeCluster(workerCount)
registerDoParallel(w)

outputs <- foreach(i=1:length(sourcedata)) %dopar%
              runmodel_mhe(dat=sourcedata[[i]]$dat, 
                           sourcedata[[i]]$dat$margin,
                           outcome=sourcedata[[i]]$outcome, 
                           n.adapt=1e4,n.iter=4e5,thin=160, 
                           electionDate=as.Date("2021-03-17"),
                           start=sourcedata[[i]]$start,
                           end=sourcedata[[i]]$end,
                           endall=sourcedata[[i]]$endall,
                           bugfile=sourcedata[[i]]$model,
                           shockdates = as.Date("2023-08-21"))
save(outputs, file="outputs.RData")

#normal 4e5 iterations, thin=160; testing 1e4 iterations, thin=2

stopCluster(w)



# Basic output ------------------------------------------------------------

partylist <- sapply(sourcedata, function(q) q$partyname)

getData <- function(X) {
  alpha.pos <- grep("^alpha",colnames(X[[1]]))
  alpha.last.pos <- grep(paste("^alpha\\[", max(alpha.pos),"\\]", sep=""),
                         colnames(X[[1]]))
  x <- X[[1]][,alpha.last.pos]
  return(c(mean(x),quantile(x,c(0.025,.975))))
}

alldata <- foreach(i=1:length(outputs), .combine=rbind) %do% {
  getData(outputs[[i]])
}

rownames(alldata) <- partylist
colnames(alldata) <- c("Percentage", "Margin_Low", "Margin_High")

# Tabel met inschatting meest recente scores
print(round(alldata * 100, 1))


# Huiseffecten voor party party_number
party_number = 1
house = outputs[[party_number]][[1]][,grep("^house\\[",dimnames(outputs[[party_number]][[1]])[[2]])]
house.Results = as.data.frame(t(apply(house, 2, function(x)c(mean(x),quantile(x,c(0.025,.975)))))) * 100
colnames(house.Results) <- c("Mean", "Low", "High")

# Namen bedrijven
companies <- sort(unique(sourcedata[[i]]$dat$bedrijf))

rns <- rownames(house.Results)  
houses <- gsub("house\\[(.),(.)\\]", "\\1", rns)
periods <- gsub("house\\[.,(.)\\]", "\\1", rns)
house_names <- companies[as.integer(houses)]

# Print formatted table with house effects for this party (in %)
data.frame(house.Results, company=house_names, houseperiod=as.numeric(periods)) |>
  filter(!is.na(company))




